Here we include some examples and notes to understand how current arviz kde/bandwidth works. The notes are at the bottom of the document.
import numpy as np
from scipy import stats
from arviz_kde import fast_kde
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import sys
sys.path.append("../")
import src
from src.bandwidth import bw_scott
plt.rcParams["figure.figsize"] = (18, 14)
BLUE = "#3498db"
GREEN = "#27ae60"
np.random.seed(1995)
def bw_az(x):
bw = 4.5
x_len = len(x)
x_log_len = np.log(x_len) * bw
# Amount of bins is usually 200, but there is also a heuristic
n_points = 200
n_bins = min(int(x_len ** (1 / 3) * x_log_len * 2), n_points)
x_grid = np.linspace(x.min(), x.max(), num=n_bins)
return (x_log_len * x_len ** -0.2) * (x_grid[1] - x_grid[0])
distributions_dict = {
"norm": stats.norm,
"gamma": stats.gamma,
"lognorm": stats.lognorm,
"beta": stats.beta,
"t": stats.t,
"vonmises": stats.vonmises
}
pdf_kwargs = {
"gaussian_1": {
"distribution": ["norm"],
"params": [{"loc" : 0, "scale": 1}],
"wts": None,
"bc": False
},
"gmixture_1": {
"distribution": ["norm", "norm"],
"params": [{"loc": 0, "scale": 0.8}, {"loc": 8, "scale": 1}],
"wts": None,
"bc": False
},
"gmixture_2": {
"distribution": ["norm", "norm"],
"params": [{"loc": 0, "scale": 0.15}, {"loc": 5, "scale": 1.2}],
"wts": None,
"bc": False
},
"gamma_1": {
"distribution": ["gamma"],
"params": [{"a": 1, "scale": 1}],
"wts": None,
"bc": True
},
"gamma_2": {
"distribution": ["gamma"],
"params": [{"a": 2, "scale": 1}],
"wts": None,
"bc": True
},
"beta_1": {
"distribution": ["beta"],
"params": [{"a" : 2.5, "b" : 1.5}],
"wts": None,
"bc": True
},
"logn_1": {
"distribution": ["lognorm"],
"params": [{"s": 1, "scale": 1}],
"wts": None,
"bc": True
},
"logn_2": {
"distribution": ["lognorm"],
"params": [{"s": 0.7, "scale": 2}],
"wts": None,
"bc": True
},
"t_1": {
"distribution": ["t"],
"params": [{"df": 2}],
"wts": None,
"bc": False
},
"skwd_mixture1": {
"distribution": ["gamma", "norm", "norm"],
"params": [{"a": 1.5, "scale": 1}, {"loc": 5, "scale": 1}, {"loc": 8, "scale": 0.75}],
"wts": [0.7, 0.2, 0.1],
"bc": True
},
"vonmises": {
"distribution": ["vonmises"],
"params": [{"kappa": 2, "loc": np.pi}],
"wts": None,
"bc": True
},
}
def component_rvs(distribution, params, size):
f = distributions_dict[distribution](**params)
rvs = f.rvs(size)
if distribution == "vonmises" and params["loc"] == np.pi:
rvs[rvs > np.pi] = rvs[rvs > np.pi] - 2 * np.pi
return rvs
def mixture_rvs(distributions, params, size, wts=None):
if wts is None:
wts = np.repeat(1 / len(distributions), len(distributions))
assert len(distributions) == len(params) == len(wts)
assert np.allclose(np.sum(wts), 1)
sizes = np.round(np.array(wts) * size).astype(int)
map_obj = map(lambda d, p, s: component_rvs(d, p, s), distributions, params, sizes)
return np.concatenate(list(map_obj))
def distribution_bounds(distribution, params):
bounds = {
"norm" : lambda p: [p["loc"] - 3.5 * p["scale"], p["loc"] + 3.5 * p["scale"]],
"gamma": lambda p: [0, stats.gamma.ppf(0.995, a=p["a"], scale=p["scale"])],
"lognorm": lambda p: [0, stats.lognorm.ppf(0.995, s=p["s"], scale=p["scale"])],
"beta": lambda p: [0, 1],
"t": lambda p: [stats.t.ppf(0.01, df=p["df"]), stats.t.ppf(0.99, df=p["df"])],
"vonmises": lambda p: [-np.pi, np.pi]
}
return bounds[distribution](params)
def mixture_bounds(distributions, params):
map_obj = map(lambda d, p: distribution_bounds(d, p), distributions, params)
vals = np.concatenate(list(map_obj))
return [np.min(vals), np.max(vals)]
def mixture_grid(distributions, params):
bounds = mixture_bounds(distributions, params)
return np.linspace(bounds[0], bounds[1], num=500)
def component_pdf(distribution, params, grid):
f = distributions_dict[distribution](**params)
return f.pdf(grid)
def mixture_pdf(distributions, params, wts = None, grid = None, return_grid=False):
if wts is None:
wts = np.repeat(1 / len(distributions), len(distributions))
assert len(distributions) == len(params) == len(wts)
assert np.allclose(np.sum(wts), 1)
if grid is None:
grid = mixture_grid(distributions, params)
pdf = np.average(list((map(lambda d, p: component_pdf(d, p, grid), distributions, params))),
axis=0, weights=wts)
if return_grid:
return grid, pdf
else:
return pdf
def compare_bw(dist, sizes, niter=50):
kwargs = pdf_kwargs[dist]
distribution = kwargs["distribution"]
params = kwargs["params"]
wts = kwargs["wts"]
bc = kwargs["bc"]
grid, true_pdf = mixture_pdf(distribution, params, wts, return_grid=True)
f = plt.figure(constrained_layout=True)
gs = f.add_gridspec(3, 3)
ax1 = f.add_subplot(gs[0, 0])
ax2 = f.add_subplot(gs[0, 1], sharex = ax1, sharey=ax1)
ax3 = f.add_subplot(gs[0, 2], sharex = ax1, sharey=ax1)
ax4 = f.add_subplot(gs[1, 0], sharex = ax1, sharey=ax1)
ax5 = f.add_subplot(gs[1, 1], sharex = ax1, sharey=ax1)
ax6 = f.add_subplot(gs[1, 2], sharex = ax1, sharey=ax1)
ax7 = f.add_subplot(gs[2, :])
size_subset = [100, 1000, 5000]
for _ in range(niter):
rvs = mixture_rvs(distribution, params, 100, wts)
x1, y1 = src.estimate_density(rvs, bw="scott", extend=False, bound_correction=bc)
y2, xmin, xmax = fast_kde(rvs)
x2 = np.linspace(xmin, xmax, len(y2))
ax1.plot(x1, y1, lw=3, c=BLUE, alpha=0.35)
ax4.plot(x2, y2, lw=3, c=GREEN, alpha=0.35)
rvs = mixture_rvs(distribution, params, 1000, wts)
x1, y1 = src.estimate_density(rvs, bw="scott", extend=False, bound_correction=bc)
y2, xmin, xmax = fast_kde(rvs)
x2 = np.linspace(xmin, xmax, len(y2))
ax2.plot(x1, y1, lw=3, c=BLUE, alpha=0.35)
ax5.plot(x2, y2, lw=3, c=GREEN, alpha=0.35)
rvs = mixture_rvs(distribution, params, 5000, wts)
x1, y1 = src.estimate_density(rvs, bw="scott", extend=False, bound_correction=bc)
y2, xmin, xmax = fast_kde(rvs)
x2 = np.linspace(xmin, xmax, len(y2))
ax3.plot(x1, y1, lw=3, c=BLUE, alpha=0.35)
ax6.plot(x2, y2, lw=3, c=GREEN, alpha=0.35)
ax1.plot(grid, true_pdf, lw=5, ls="--", c="black")
ax2.plot(grid, true_pdf, lw=5, ls="--", c="black")
ax3.plot(grid, true_pdf, lw=5, ls="--", c="black")
ax4.plot(grid, true_pdf, lw=5, ls="--", c="black")
ax5.plot(grid, true_pdf, lw=5, ls="--", c="black")
ax6.plot(grid, true_pdf, lw=5, ls="--", c="black")
ax1.set_xlim(grid.min(), grid.max())
ax2.set_xlim(grid.min(), grid.max())
ax3.set_xlim(grid.min(), grid.max())
ax4.set_xlim(grid.min(), grid.max())
ax5.set_xlim(grid.min(), grid.max())
ax6.set_xlim(grid.min(), grid.max())
ax1.set_title("Size = 100")
ax4.set_title("Size = 100")
ax2.set_title("Size = 1000")
ax5.set_title("Size = 1000")
ax3.set_title("Size = 5000")
ax6.set_title("Size = 5000")
for _ in range(niter):
rvs_list = [mixture_rvs(distribution, params, size, wts) for size in sizes]
scott_vals = [bw_scott(rvs) for rvs in rvs_list]
az_vals = [bw_az(rvs) for rvs in rvs_list]
ax7.plot(sizes, scott_vals, lw=2, c=BLUE, alpha=0.35)
ax7.plot(sizes, az_vals, lw=2, c=GREEN, alpha=0.35)
ax7.set_xticks(sizes)
ax7.set_xlabel("Size")
ax7.set_ylabel("Bandwidth")
colors = [BLUE, GREEN]
lines = [Line2D([0], [0], color=c, linewidth=3) for c in colors]
labels = ["Scott's rule", "Arviz rule"]
ax7.legend(lines, labels)
sizes = [100, 200, 500, 1000, 2000, 5000]
compare_bw("gaussian_1", sizes, 40)
compare_bw("gmixture_1", sizes, 40)
compare_bw("gmixture_2", sizes, 40)
compare_bw("logn_1", sizes, 40)
compare_bw("logn_2", sizes, 40)
compare_bw("gamma_1", sizes, 40)
compare_bw("gamma_2", sizes, 40)
compare_bw("beta_1", sizes, 40)
compare_bw("t_1", sizes, 40)
compare_bw("skwd_mixture1", sizes, 40)
compare_bw("vonmises", sizes, 40)
Here we don't try to demonstrate any superiority of Scott's rule over Arviz bandwidth. Here we try to study Arviz bandwidth rule and use a well-known rule such as Scott's only for comparison purposes.
Arviz bandwidth is computed as $\text{bw} = 4.5 \log(N) N ^{-0.2}$. This computation is not in the same scale than, for example, Scott's bandwidth, so we can't compare them directly. This computation is in the scale of the scaled bandwidth one uses when using binning and convolution to produce the Gaussian KDE.
If we want to put Arviz bandwidth in the same scale than other rules/methods, we need to multiply its result by the bin width. In other words, Arviz bandwidth directly depends on the spacing of the grid used to compute the density estimate.
Next, some notes: